
# Table 2 - Comparing PSM + Number of phosphorylations across data sets #
#########################################################################

#> [conflicted] Will prefer dplyr::filter over any other package
suppressPackageStartupMessages(library("tidyverse"))

library(dplyr)
library(stringr)
library(useful)
library(MASS)
library(reshape2)
library(epiDisplay)


PXD000923A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD000923_A_PSMSITE.csv")
PXD002222A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD002222_A_PSMSITE.csv")
PXD002756A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD002756_A_PSMSITE.csv")
PXD004705A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD004705_A_PSMSITE.csv")
PXD004939A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD004939_A_PSMSITE.csv")
PXD005241A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD005241_A_PSMSITE.csv")
PXD005241A_TPP$Spectrum <- str_remove(PXD005241A_TPP$Spectrum, '_raw')
PXD005241A_TPP$Spectrum <- str_remove_all(PXD005241A_TPP$Spectrum, '_')
PXD012764A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD012764_A_PSMSITE.csv")
PXD019291A_TPP <- read.csv("D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD019291_A_PSMSITE.csv")


PXD000923A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD000923_A_PSMSITE.csv")
PXD002222A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD002222_A_PSMSITE.csv")
PXD002756A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD002756_A_PSMSITE.csv")
PXD004705A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD004705_A_PSMSITE.csv")
PXD004939A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD004939_A_PSMSITE.csv")
PXD005241A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD005241_A_PSMSITE.csv")
PXD012764A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD012764_A_PSMSITE.csv")
PXD019291A_PD <- read.csv( "D:/Pipeline comparisons/Writing/Data/PD/Rice/PD_PXD019291_A_PSMSITE.csv")


PXD000923A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD000923_A_PSMSITE.csv")
PXD002222A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD002222_A_PSMSITE.csv")
PXD002756A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD002756_A_PSMSITE.csv")
PXD004705A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD004705_A_PSMSITE.csv")
PXD004939A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD004939_A_PSMSITE.csv")
PXD005241A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD005241_A_PSMSITE.csv")
PXD012764A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD012764_A_PSMSITE.csv")
PXD019291A_MQ <- read.csv( "D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD019291_A_PSMSITE.csv")

PXD000923A_TPP$Seq_nPhospho <- paste0(PXD000923A_TPP$Peptide,"_",str_count(PXD000923A_TPP$Peptide_mod,"Phospho"))
PXD002222A_TPP$Seq_nPhospho <- paste0(PXD002222A_TPP$Peptide,"_",str_count(PXD002222A_TPP$Peptide_mod,"Phospho"))
PXD002756A_TPP$Seq_nPhospho <- paste0(PXD002756A_TPP$Peptide,"_",str_count(PXD002756A_TPP$Peptide_mod,"Phospho"))
PXD004705A_TPP$Seq_nPhospho <- paste0(PXD004705A_TPP$Peptide,"_",str_count(PXD004705A_TPP$Peptide_mod,"Phospho"))
PXD004939A_TPP$Seq_nPhospho <- paste0(PXD004939A_TPP$Peptide,"_",str_count(PXD004939A_TPP$Peptide_mod,"Phospho"))
PXD005241A_TPP$Seq_nPhospho <- paste0(PXD005241A_TPP$Peptide,"_",str_count(PXD005241A_TPP$Peptide_mod,"Phospho"))
PXD012764A_TPP$Seq_nPhospho <- paste0(PXD012764A_TPP$Peptide,"_",str_count(PXD012764A_TPP$Peptide_mod,"Phospho"))
PXD019291A_TPP$Seq_nPhospho <- paste0(PXD019291A_TPP$Peptide,"_",str_count(PXD019291A_TPP$Peptide_mod,"Phospho"))

PXD000923A_PD$Seq_nPhospho <- paste0(PXD000923A_PD$Sequence,"_",PXD000923A_PD$n_Phos)
PXD002222A_PD$Seq_nPhospho <- paste0(PXD002222A_PD$Sequence,"_",PXD002222A_PD$n_Phos)
PXD002756A_PD$Seq_nPhospho <- paste0(PXD002756A_PD$Sequence,"_",PXD002756A_PD$n_Phos)
PXD004705A_PD$Seq_nPhospho <- paste0(PXD004705A_PD$Sequence,"_",PXD004705A_PD$n_Phos)
PXD004939A_PD$Seq_nPhospho <- paste0(PXD004939A_PD$Sequence,"_",PXD004939A_PD$n_Phos)
PXD005241A_PD$Seq_nPhospho <- paste0(PXD005241A_PD$Sequence,"_",PXD005241A_PD$n_Phos)
PXD012764A_PD$Seq_nPhospho <- paste0(PXD012764A_PD$Sequence,"_",PXD012764A_PD$n_Phos)
PXD019291A_PD$Seq_nPhospho <- paste0(PXD019291A_PD$Sequence,"_",PXD019291A_PD$n_Phos)

PXD000923A_MQ$Seq_nPhospho <- paste0(PXD000923A_MQ$Sequence,"_",str_count(PXD000923A_MQ$Modified.sequence,"Phospho"))
PXD002222A_MQ$Seq_nPhospho <- paste0(PXD002222A_MQ$Sequence,"_",str_count(PXD002222A_MQ$Modified.sequence,"Phospho"))
PXD002756A_MQ$Seq_nPhospho <- paste0(PXD002756A_MQ$Sequence,"_",str_count(PXD002756A_MQ$Modified.sequence,"Phospho"))
PXD004705A_MQ$Seq_nPhospho <- paste0(PXD004705A_MQ$Sequence,"_",str_count(PXD004705A_MQ$Modified.sequence,"Phospho"))
PXD004939A_MQ$Seq_nPhospho <- paste0(PXD004939A_MQ$Sequence,"_",str_count(PXD004939A_MQ$Modified.sequence,"Phospho"))
PXD005241A_MQ$Seq_nPhospho <- paste0(PXD005241A_MQ$Sequence,"_",str_count(PXD005241A_MQ$Modified.sequence,"Phospho"))
PXD012764A_MQ$Seq_nPhospho <- paste0(PXD012764A_MQ$Sequence,"_",str_count(PXD012764A_MQ$Modified.sequence,"Phospho"))
PXD019291A_MQ$Seq_nPhospho <- paste0(PXD019291A_MQ$Sequence,"_",str_count(PXD019291A_MQ$Modified.sequence,"Phospho"))

# PSM count at 1% FDR #


PXD000923A_TPP_PSM <- length(unique(PXD000923A_TPP$Spectrum))
PXD002222A_TPP_PSM <- length(unique(PXD002222A_TPP$Spectrum))
PXD002756A_TPP_PSM <- length(unique(PXD002756A_TPP$Spectrum))
PXD004705A_TPP_PSM <- length(unique(PXD004705A_TPP$Spectrum))
PXD004939A_TPP_PSM <- length(unique(PXD004939A_TPP$Spectrum))
PXD005241A_TPP_PSM <- length(unique(PXD005241A_TPP$Spectrum))
PXD012764A_TPP_PSM <- length(unique(PXD012764A_TPP$Spectrum))
PXD019291A_TPP_PSM <- length(unique(PXD019291A_TPP$Spectrum))

PXD000923A_PD_PSM <- length(unique(paste0(PXD000923A_PD$File.ID,"_",PXD000923A_PD$First.Scan)))
PXD002222A_PD_PSM <- length(unique(paste0(PXD002222A_PD$File.ID,"_",PXD002222A_PD$First.Scan)))
PXD002756A_PD_PSM <- length(unique(paste0(PXD002756A_PD$File.ID,"_",PXD002756A_PD$First.Scan)))
PXD004705A_PD_PSM <- length(unique(paste0(PXD004705A_PD$File.ID,"_",PXD004705A_PD$First.Scan)))
PXD004939A_PD_PSM <- length(unique(paste0(PXD004939A_PD$File.ID,"_",PXD004939A_PD$First.Scan)))
PXD005241A_PD_PSM <- length(unique(paste0(PXD005241A_PD$File.ID,"_",PXD005241A_PD$First.Scan)))
PXD012764A_PD_PSM <- length(unique(paste0(PXD012764A_PD$File.ID,"_",PXD012764A_PD$First.Scan)))
PXD019291A_PD_PSM <- length(unique(paste0(PXD019291A_PD$File.ID,"_",PXD019291A_PD$First.Scan)))


PXD000923A_MQ_PSM <- length(unique(paste0(PXD000923A_MQ$Spectrum,"_",PXD000923A_MQ$MS.MS.IDs)))
PXD002222A_MQ_PSM <- length(unique(paste0(PXD002222A_MQ$Spectrum,"_",PXD002222A_MQ$MS.MS.IDs)))
PXD002756A_MQ_PSM <- length(unique(paste0(PXD002756A_MQ$Spectrum,"_",PXD002756A_MQ$MS.MS.IDs)))
PXD004705A_MQ_PSM <- length(unique(paste0(PXD004705A_MQ$Spectrum,"_",PXD004705A_MQ$MS.MS.IDs)))
PXD004939A_MQ_PSM <- length(unique(paste0(PXD004939A_MQ$Spectrum,"_",PXD004939A_MQ$MS.MS.IDs)))
PXD005241A_MQ_PSM <- length(unique(paste0(PXD005241A_MQ$Spectrum,"_",PXD005241A_MQ$MS.MS.IDs)))
PXD012764A_MQ_PSM <- length(unique(paste0(PXD012764A_MQ$Spectrum,"_",PXD012764A_MQ$MS.MS.IDs)))
PXD019291A_MQ_PSM <- length(unique(paste0(PXD019291A_MQ$Spectrum,"_",PXD019291A_MQ$MS.MS.IDs)))

# Unique PSMs + nPhospho 


Unique_PXD000923A_TPP <- cbind.data.frame(distinct(PXD000923A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD002222A_TPP <- cbind.data.frame(distinct(PXD002222A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD002756A_TPP <- cbind.data.frame(distinct(PXD002756A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD004705A_TPP <- cbind.data.frame(distinct(PXD004705A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD004939A_TPP <- cbind.data.frame(distinct(PXD004939A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD005241A_TPP <- cbind.data.frame(distinct(PXD005241A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD012764A_TPP <- cbind.data.frame(distinct(PXD012764A_TPP,Seq_nPhospho),pipeline="TPP")
Unique_PXD019291A_TPP <- cbind.data.frame(distinct(PXD019291A_TPP,Seq_nPhospho),pipeline="TPP")

Unique_PXD000923A_PD <- cbind.data.frame(distinct(PXD000923A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD002222A_PD <- cbind.data.frame(distinct(PXD002222A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD002756A_PD <- cbind.data.frame(distinct(PXD002756A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD004705A_PD <- cbind.data.frame(distinct(PXD004705A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD004939A_PD <- cbind.data.frame(distinct(PXD004939A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD005241A_PD <- cbind.data.frame(distinct(PXD005241A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD012764A_PD <- cbind.data.frame(distinct(PXD012764A_PD,Seq_nPhospho),pipeline="PD")
Unique_PXD019291A_PD <- cbind.data.frame(distinct(PXD019291A_PD,Seq_nPhospho),pipeline="PD")

Unique_PXD000923A_MQ <- cbind.data.frame(distinct(PXD000923A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD002222A_MQ <- cbind.data.frame(distinct(PXD002222A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD002756A_MQ <- cbind.data.frame(distinct(PXD002756A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD004705A_MQ <- cbind.data.frame(distinct(PXD004705A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD004939A_MQ <- cbind.data.frame(distinct(PXD004939A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD005241A_MQ <- cbind.data.frame(distinct(PXD005241A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD012764A_MQ <- cbind.data.frame(distinct(PXD012764A_MQ,Seq_nPhospho),pipeline="MQ")
Unique_PXD019291A_MQ <- cbind.data.frame(distinct(PXD019291A_MQ,Seq_nPhospho),pipeline="MQ")

# Merging data #
################

M1_PXD000923A <- merge(Unique_PXD000923A_TPP,Unique_PXD000923A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD000923A <- merge(M1_PXD000923A,Unique_PXD000923A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD002222A <- merge(Unique_PXD002222A_TPP,Unique_PXD002222A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD002222A <- merge(M1_PXD002222A,Unique_PXD002222A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD002756A <- merge(Unique_PXD002756A_TPP,Unique_PXD002756A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD002756A <- merge(M1_PXD002756A,Unique_PXD002756A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD004705A <- merge(Unique_PXD004705A_TPP,Unique_PXD004705A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD004705A <- merge(M1_PXD004705A,Unique_PXD004705A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD004939A <- merge(Unique_PXD004939A_TPP,Unique_PXD004939A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD004939A <- merge(M1_PXD004939A,Unique_PXD004939A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD005241A <- merge(Unique_PXD005241A_TPP,Unique_PXD005241A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD005241A <- merge(M1_PXD005241A,Unique_PXD005241A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD012764A <- merge(Unique_PXD012764A_TPP,Unique_PXD012764A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD012764A <- merge(M1_PXD012764A,Unique_PXD012764A_MQ, by="Seq_nPhospho", all = TRUE)
M1_PXD019291A <- merge(Unique_PXD019291A_TPP,Unique_PXD019291A_PD, by="Seq_nPhospho", all = TRUE)
Merged_PXD019291A <- merge(M1_PXD019291A,Unique_PXD019291A_MQ, by="Seq_nPhospho", all = TRUE)

Merged_PXD000923A$Group <- paste0(Merged_PXD000923A$pipeline.x,"_",Merged_PXD000923A$pipeline.y,"_",Merged_PXD000923A$pipeline)
Merged_PXD000923A$Group <- gsub('_NA', '', Merged_PXD000923A$Group)
Merged_PXD000923A$Group <- gsub('NA_', '', Merged_PXD000923A$Group)

Merged_PXD002222A$Group <- paste0(Merged_PXD002222A$pipeline.x,"_",Merged_PXD002222A$pipeline.y,"_",Merged_PXD002222A$pipeline)
Merged_PXD002222A$Group <- gsub('_NA', '', Merged_PXD002222A$Group)
Merged_PXD002222A$Group <- gsub('NA_', '', Merged_PXD002222A$Group)

Merged_PXD002756A$Group <- paste0(Merged_PXD002756A$pipeline.x,"_",Merged_PXD002756A$pipeline.y,"_",Merged_PXD002756A$pipeline)
Merged_PXD002756A$Group <- gsub('_NA', '', Merged_PXD002756A$Group)
Merged_PXD002756A$Group <- gsub('NA_', '', Merged_PXD002756A$Group)

Merged_PXD004705A$Group <- paste0(Merged_PXD004705A$pipeline.x,"_",Merged_PXD004705A$pipeline.y,"_",Merged_PXD004705A$pipeline)
Merged_PXD004705A$Group <- gsub('_NA', '', Merged_PXD004705A$Group)
Merged_PXD004705A$Group <- gsub('NA_', '', Merged_PXD004705A$Group)

Merged_PXD004939A$Group <- paste0(Merged_PXD004939A$pipeline.x,"_",Merged_PXD004939A$pipeline.y,"_",Merged_PXD004939A$pipeline)
Merged_PXD004939A$Group <- gsub('_NA', '', Merged_PXD004939A$Group)
Merged_PXD004939A$Group <- gsub('NA_', '', Merged_PXD004939A$Group)

Merged_PXD005241A$Group <- paste0(Merged_PXD005241A$pipeline.x,"_",Merged_PXD005241A$pipeline.y,"_",Merged_PXD005241A$pipeline)
Merged_PXD005241A$Group <- gsub('_NA', '', Merged_PXD005241A$Group)
Merged_PXD005241A$Group <- gsub('NA_', '', Merged_PXD005241A$Group)

Merged_PXD012764A$Group <- paste0(Merged_PXD012764A$pipeline.x,"_",Merged_PXD012764A$pipeline.y,"_",Merged_PXD012764A$pipeline)
Merged_PXD012764A$Group <- gsub('_NA', '', Merged_PXD012764A$Group)
Merged_PXD012764A$Group <- gsub('NA_', '', Merged_PXD012764A$Group)

Merged_PXD019291A$Group <- paste0(Merged_PXD019291A$pipeline.x,"_",Merged_PXD019291A$pipeline.y,"_",Merged_PXD019291A$pipeline)
Merged_PXD019291A$Group <- gsub('_NA', '', Merged_PXD019291A$Group)
Merged_PXD019291A$Group <- gsub('NA_', '', Merged_PXD019291A$Group)

tab1(Merged_PXD000923A$Group)
tab1(Merged_PXD002222A$Group)
tab1(Merged_PXD002756A$Group)
tab1(Merged_PXD004705A$Group)
tab1(Merged_PXD004939A$Group)
tab1(Merged_PXD005241A$Group)
tab1(Merged_PXD012764A$Group)
tab1(Merged_PXD019291A$Group)


